package org.dataalgorithms.chap05.mapreduce;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
import java.io.IOException;

/** 
 * RelativeFrequencyDriver is driver class for computing relative frequency of words.
 *
 * @author Mahmoud Parsian
 *
 */
public class RelativeFrequencyDriver 
	extends Configured implements Tool {
	
	private static final Logger THE_LOGGER = Logger.getLogger(RelativeFrequencyDriver.class);

	/**
	 * Dispatches command-line arguments to the tool 
	 * by the ToolRunner.
	 */
	public static void main(String[] args) throws Exception {
		if (args.length != 3) {
			THE_LOGGER.warn("usage: <window> <input> <output>");
			System.exit(-1);
		}
		int status = ToolRunner.run(new RelativeFrequencyDriver(), args);
		System.exit(status);
	}


	public int run(String[] args) throws Exception {
		int neighborWindow = Integer.parseInt(args[0]);
		Path inputPath = new Path(args[1]);
		Path outputPath = new Path(args[2]);

		Job job = new Job(new Configuration(), "RelativeFrequencyDriver");
		job.setJarByClass(RelativeFrequencyDriver.class);
        job.setJobName("RelativeFrequencyDriver");

		// Delete the output directory if it exists already
		FileSystem.get(getConf()).delete(outputPath, true);

		job.getConfiguration().setInt("neighbor.window", neighborWindow);

		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);

		// (key,value) generated by map()
		job.setMapOutputKeyClass(PairOfWords.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		// (key,value) generated by reduce()
		job.setOutputKeyClass(PairOfWords.class);
		job.setOutputValueClass(DoubleWritable.class);

        job.setMapperClass(RelativeFrequencyMapper.class);
        job.setReducerClass(RelativeFrequencyReducer.class);
        job.setCombinerClass(RelativeFrequencyCombiner.class);
        job.setPartitionerClass(OrderInversionPartitioner.class);
        job.setNumReduceTasks(3);

		long startTime = System.currentTimeMillis();
		job.waitForCompletion(true);
		THE_LOGGER.info("Job Finished in milliseconds: " + (System.currentTimeMillis() - startTime));
		return 0;
	}

}
